/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Mar 9, 2011
 */

package com.bigdata.util.httpd;

import java.util.Enumeration;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.apache.log4j.Logger;

/**
 * This class provides set of utilities for encoding and decoding HTTP
 * headers and doubles as the base class for all classes that
 * implement support for a specific HTTP header, such as {@link
 * LinkHeader}, {@link AcceptHeader}, etc.
 */

public class HTTPHeaderUtility
{

    /**
     * The {@link Logger} for HTTP header operations, including
     * parsing and serialization.  The {@link Logger} is named for
     * this class.  It should be used by all derived classes.
     */

    protected static final Logger log = Logger.getLogger
	( HTTPHeaderUtility.class
	  );

    // quoted-string
    //           :=
    //
    // token     := One of more of any CHAR except CTLs or separators.
    //
    // CTL       := <any US-ASCII control character (octets 0 - 31) and
    //               DEL (127)>
    //
    // separators := "(" | ")" | "<" | ">" | "@"
    //             | "," | ";" | ":" | "\" | <">
    //             | "/" | "[" | "]" | "?" | "="
    //             | "{" | "}" | SP | HT

    // token     := One of more of any CHAR except CTLs or separators.
    //
    // CTL       := <any US-ASCII control character (octets 0 - 31) and
    //               DEL (127)>
    //
    // separators := "(" | ")" | "<" | ">" | "@"
    //             | "," | ";" | ":" | "\" | <">
    //             | "/" | "[" | "]" | "?" | "="
    //             | "{" | "}" | SP | HT
    
    /**
     * Matches an HTTP <code>token</code>, which consists of one or
     * more of any CHAR except <code>CTL</code> or
     * <code>separator</code>.
     */

    final protected static String httpTokenPattern =
	"[^\\p{Cntrl}\\(\\)<>@,;:\\\\\\\"/\\[\\]\\?=\\{\\}\\s\\x09]+"
	;
    
    /**
     * The text for a {@link Pattern} matching an HTTP
     * <code>quoted-string</code>.<p>
     *
     * From HTTP/1.1:<p>
     *
     * A string of text is parsed as a single word if it is quoted
     * using double-quote marks.<p>
     *
     * The backslash character '\' MAY be used as a single-character
     * quoting mechanism only within quoted-string and comment
     * constructs.<p>
     *

<pre>

quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
qdtext = <any TEXT except <">>
quoted-pair = "\" CHAR
TEXT = <any OCTET except CTLs, but including LWS>

</pre>

     * Note: This pattern text uses a non-capturing group to
     * encapsulate the choice between a legal character (TEXT) and an
     * escaped character (quoted-pair) within the context of the
     * quoted-string. Any quoted-pair sequences must be reversed by
     * the consumer of the matched group.<p>
     */

    final protected static String httpQuotedStringPattern =
	"\\\"(?:\\\\\"|[^\\p{Cntrl}\\\"])*\\\""
	;

    /**
     * Returns true iff the character is an HTTP <code>CTL</code>
     * character.
     */

    static public boolean isHttpCtlChar( char ch )
    {

	int c = (int)ch;
	
	if( c >= 0 && c <= 31 ) return true;
	
	if( c == 127 ) return true;
	
	return false;

    }
    
    /**
     * Returns true iff the character is an HTTP
     * <code>separator</code> character.
     */

    static public boolean isHttpSeparatorChar( char ch )
    {

	switch( ch ) {
	case '(':
	case ')':
	case '<':
	case '>':
	case '@':
	case ',':
	case ';':
	case ':':
	case '\\':
	case '"':
	case '/':
	case '[':
	case ']':
	case '?':
	case '=':
	case '{':
	case '}':
	case ' ':		// ASCII space (32).
	case (char)0x09:	// Horizontal tab (9).

	    return true;

	}

	return false;

    }
    
    /**
     * Matches an HTTP <code>token</code>, which consists of one or
     * more of any CHAR except <code>CTL</code> or
     * <code>separator</code>.
     */

    final protected static String tok =
	"[^\\p{Cntrl}\\(\\)<>@,;:\\\\\\\"/\\[\\]\\?=\\{\\}\\s\\x09]+"
	;
    
    /**
     * Returns true iff the {@link String} obeys the syntax rules
     * for an HTTP <code>token</code>.
     */

    static public boolean isHttpToken( String token )
    {
	
	int len = token.length();
	
	for( int i=0; i<len; i++ ) {
	    
	    char ch = token.charAt( i );
	    
	    if( isHttpCtlChar( ch ) ||
		isHttpSeparatorChar( ch )
		) {
		
		return false;
		
	    }
	    
	}
	
	return true;
	
    }
    
    /**
     * The text for a {@link Pattern} matching an HTTP
     * <code>quoted-string</code>.<p>
     *
     * From HTTP/1.1:<p>
     *
     * A string of text is parsed as a single word if it is quoted
     * using double-quote marks.<p>
     *
     * The backslash character '\' MAY be used as a single-character
     * quoting mechanism only within quoted-string and comment
     * constructs.<p>
     *

<pre>

quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
qdtext = <any TEXT except <">>
quoted-pair = "\" CHAR
TEXT = <any OCTET except CTLs, but including LWS>

</pre>

     * Note: This pattern text uses a non-capturing group to
     * encapsulate the choice between a legal character (TEXT) and an
     * escaped character (quoted-pair) within the context of the
     * quoted-string. Any quoted-pair sequences must be reversed by
     * the consumer of the matched group.<p>
     */

    final protected static String qs =
	"\\\"(?:\\\\\"|[^\\p{Cntrl}\\\"])*\\\""
	;
    
    /**
     * Pattern used to match the type/subtype of a MIME expression.
     * The matched groups are numbered by the opening parentheses in
     * the pattern.  The different matching groups are:<ul>
     *
     * <li> group(0) : the matched input.
     * 
     * <li> group(1) : type
     * 
     * <li> group(2) : subtype
     * 
     * <li> group(3) : the rest of the input, to which you then apply
     * {@link #m_p2}.
     * 
     * </ul>
     */
    static protected Pattern m_p1 = null;

    /**
     * Pattern used to match the optional parameters of a MIME
     * expression.  The matched groups are numbered by the opening
     * parentheses in the pattern. The source for the pattern is the
     * parameters as identified by {@link #m_p1}.  The different
     * matching groups are:<ul>
     *
     * <li> group( 0 ) : parameter ( attribute=value ).
     * 
     * <li> group( 1 ) : attribute (parameter name).
     * 
     * <li> group( 2 ) : value for that parameter.
     * 
     * </ul>
     *
     * Note: You should match this pattern repeatedly until the input
     * is exhausted.<p>
     */

    static protected Pattern m_p2 = null;

    /**
     * Initialization for shared {@link Pattern} object that matches
     * valid MIME type expressions.
     */

    static protected void init()
    {

	try {
	    
	    if( m_p1 != null && m_p2 != null ) return;
	    
	    m_p1 = Pattern.compile
		( "^("+tok+")/("+tok+")(.*)$"
		  );
	    
	    m_p2 = Pattern.compile
		( "\\s*;\\s*("+tok+")=("+tok+"|"+qs+")\\s*"
		  );

	}

	catch( PatternSyntaxException ex ) {

	    /* Masquerade this exception so that it does not show up
	     * on method signatures throughout this and other
	     * packages.  The decision to use java.util.regex here is
	     * an implementation decision and its exception signatures
	     * should not be propagated.
	     */

	    AssertionError err = new AssertionError
		( "Could not compile regex patterns."
		  );

	    err.initCause( ex );

	    throw err;

	}


    }

    /**
     * Returns the MIME type parameter value as either an HTTP
     * <code>token</code> or HTTP <code>quoted-string</code> depending
     * on whether or not it contains any characters that need to be
     * escaped.
     *
     * @param force When true the returned value is always a
     * <code>quoted-string</code>.
     */
    
    static public String quoteString
	( String value,
	  boolean force
	  )
    {
	
	StringBuffer sb = new StringBuffer();
	
	int len = value.length();
	
	boolean didEscape = false;
	
	for( int i=0; i<len; i++ ) {
	    
	    char ch = value.charAt( i );
	    
	    if( isHttpCtlChar( ch ) ||
		isHttpSeparatorChar( ch )
		) {
		
		sb.append( '\\' );
		
		didEscape = true;
		
	    }
	    
	    sb.append( ch );
	    
	}
	
	return ( didEscape || force )
	    ? "\""+sb.toString()+"\""
	    : sb.toString()
	    ;
	
    }
    
    /**
     * If the value is an HTTP <code>quoted-string</code> then we
     * strip of the quote characters now and translate any escaped
     * characters into themselves, e.g., '\"' => '"'.  Otherwise
     * returns <i>value</i>.
     *
     * @param IllegalArgumentException If the value is a malformed
     * quoted string.  For example, no closing quote character or no
     * character after an escape character.
     */
	
    static public String unquoteString( String value )
	throws IllegalArgumentException
    {
	
	String originalValue = value; // save.
	
	/* Do nothing unless a quoted-string.
	 */
	
	if( ! value.startsWith( "\"" ) ) {
	    
	    return value;
	    
	}
	
	/* Drop off the quote characters.
	 */
	
	if( ! value.endsWith( "\"" ) ) {
	    
	    throw new IllegalArgumentException
		( "Quoted string does not end with '\"'"+
		  " : "+originalValue
		  );
	    
	}
	
	// Chop off the quote characters.
	
	value = value.substring
	    ( 1,
	      value.length() - 1
	      );
	
	// Translate escaped characters.
	
	StringBuffer sb = new StringBuffer();
	
	int len = value.length();
	
	for( int i=0; i<len; i++ ) {
	    
	    char ch = value.charAt( i );
	    
	    if( ch == '\\' ) {
		
		i++;
		
		if( i < len ) {
		    
		    ch = value.charAt( i );
		    
		    sb.append( ch );
		    
		} else {
		    
		    throw new IllegalArgumentException
			( "Escape character at end of string"+
			  " : "+originalValue
			  );
		    
		}
		
	    } else {
		
		sb.append( ch );
		
	    }
	    
	}
	
	return sb.toString();
	
    }

    /**
     * HTTP permits headers whose grammar is a comma delimited list to
     * be specified multiple times in an HTTP request.  This method
     * propertly combines the specified values need into a {@link
     * String} containing a comma-delimited list that preserves the
     * order in which the header values were specified.
     *
     * @param values E.g., as returned by {@link
     * javax.servlet.http#getHeaders( String name )}.
     *
     * @param defaultValue IFF <i>enum</i> is an empty enumeration,
     * then this value is returned to the caller.
     */

    public static String combineHeaders
	( Enumeration values,
	  String defaultValue
	  )
    {

	if( ! values.hasMoreElements() ) {

	    return defaultValue;

	}

	StringBuffer sb = new StringBuffer();

	boolean first = true;

	while( values.hasMoreElements() ) {

	    String value = (String)values.nextElement();
	    
	    if( ! first ) {

		sb.append( ", " );

	    }

	    sb.append( value );

	    first = false;

	}

	return sb.toString();

    }

    /**
     * Splits out the elements for an HTTP header value whose grammar
     * is a comma delimited list.
     */

    public static String[] splitCommaList
	( String value
	  )
    {

	log.debug
	    ( "Header-Value: "+value
	      );

	String[] values = value.split
	    ( "\\s*,\\s*"
	      );

	log.debug
	    ( "Found "+values.length+" elements in list."
	      );

	return values;

    }

}
